package com.atguigu.bigdata.spark.core.rdd.operator.transform;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;

import java.util.Arrays;
import java.util.List;

public class Spark09_RDD_Operator_Transform_JAVA {
    public static void main(String[] args) {
        //distinct算子 去重
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("sparkCore");
        JavaSparkContext sc = new JavaSparkContext(conf);

        List<Integer> list = Arrays.asList(1,2,3,4,1,2,3,4);

        JavaRDD<Integer> rdd = sc.parallelize(list,2);

        JavaRDD<Integer> distinct = rdd.distinct();

        System.out.println(distinct.collect().toString());

        sc.stop();
    }
}
